%run "/code/source/notebooks/notebook_settings.py"
import logging
import helpsk as hlp
from helpsk.utility import read_pickle, Timer
from helpsk.sklearn_eval import MLExperimentResults
import source.config.config as config
from source.service.model_registry import ModelRegistry
logging.info("Running experiment notebook for last run.")
2022-09-19 23:32:55 - INFO | Running experiment notebook for last run.
registry = ModelRegistry(tracking_uri=config.experiment_server_url())
experiment = registry.get_experiment_by_name(exp_name=config.experiment_name())
logging.info(f"Experiment id: {experiment.last_run.exp_id}")
logging.info(f"Experiment name: {experiment.last_run.exp_name}")
logging.info(f"Run id: {experiment.last_run.run_id}")
logging.info(f"Metric(s): {experiment.last_run.metrics}")
2022-09-19 23:32:55 - INFO | Experiment id: 1
2022-09-19 23:32:55 - INFO | Experiment name: credit
2022-09-19 23:32:55 - INFO | Run id: 332741281d294d80a3d25a6ff5911385
2022-09-19 23:32:55 - INFO | Metric(s): {'roc_auc': 0.7566610156276207}
What is the metric/performance from the model associated with the last run?
logging.info(f"last run metrics: {experiment.last_run.metrics}")
2022-09-19 23:32:55 - INFO | last run metrics: {'roc_auc': 0.7566610156276207}
What is the metric/performance of the model in production?
production_run = registry.get_production_run(model_name=config.model_name())
logging.info(f"production run metrics: {production_run.metrics}")
2022-09-19 23:32:55 - INFO | production run metrics: {'roc_auc': 0.7566610156276207}
# underlying mlflow object
experiment.last_run.mlflow_entity
<Run: data=<RunData: metrics={'roc_auc': 0.7566610156276207}, params={'prep__non_numeric__encoder__transformer': "OneHotEncoder(handle_unknown='ignore')",
'prep__numeric__imputer__transformer': 'SimpleImputer()',
'prep__numeric__pca__transformer': 'None',
'prep__numeric__scaler__transformer': 'None'}, tags={'mlflow.log-model.history': '[{"run_id": "332741281d294d80a3d25a6ff5911385", '
'"artifact_path": "model", "utc_time_created": '
'"2022-09-19 23:31:39.008858", "flavors": '
'{"python_function": {"model_path": "model.pkl", '
'"loader_module": "mlflow.sklearn", '
'"python_version": "3.9.13", "env": '
'"conda.yaml"}, "sklearn": {"pickled_model": '
'"model.pkl", "sklearn_version": "1.1.1", '
'"serialization_format": "cloudpickle", "code": '
'null}}, "model_uuid": '
'"b5cae9a31b76437391c74123d7869a50", '
'"mlflow_version": "1.26.1"}]',
'mlflow.note.content': '2022_09_19_23_31_12',
'mlflow.runName': '2022_09_19_23_31_12',
'mlflow.source.git.commit': 'd8f551cfb135281ae402d29c4ba7f77359921efd',
'mlflow.source.name': 'source/entrypoints/cli.py',
'mlflow.source.type': 'LOCAL',
'mlflow.user': 'root',
'type': 'BayesSearchCV'}>, info=<RunInfo: artifact_uri='./mlflow-artifact-root/1/332741281d294d80a3d25a6ff5911385/artifacts', end_time=1663630308389, experiment_id='1', lifecycle_stage='active', run_id='332741281d294d80a3d25a6ff5911385', run_uuid='332741281d294d80a3d25a6ff5911385', start_time=1663630272486, status='FINISHED', user_id='root'>>
with Timer("Loading training/test datasets"):
X_train = experiment.last_run.download_artifact(artifact_name='x_train.pkl', read_from=read_pickle)
X_test = experiment.last_run.download_artifact(artifact_name='x_test.pkl', read_from=read_pickle)
y_train = experiment.last_run.download_artifact(artifact_name='y_train.pkl', read_from=read_pickle)
y_test = experiment.last_run.download_artifact(artifact_name='y_test.pkl', read_from=read_pickle)
Timer Started: Loading training/test datasets Timer Finished (0.03 seconds)
logging.info(f"training X shape: {X_train.shape}")
logging.info(f"training y length: {len(y_train)}")
logging.info(f"test X shape: {X_test.shape}")
logging.info(f"test y length: {len(y_test)}")
2022-09-19 23:32:55 - INFO | training X shape: (800, 20) 2022-09-19 23:32:55 - INFO | training y length: 800 2022-09-19 23:32:55 - INFO | test X shape: (200, 20) 2022-09-19 23:32:55 - INFO | test y length: 200
np.unique(y_train, return_counts=True)
(array([0, 1]), array([559, 241]))
train_y_proportion = np.unique(y_train, return_counts=True)[1] \
/ np.sum(np.unique(y_train, return_counts=True)[1])
logging.info(f"balance of y in training: {train_y_proportion}")
2022-09-19 23:32:55 - INFO | balance of y in training: [0.69875 0.30125]
test_y_proportion = np.unique(y_test, return_counts=True)[1] \
/ np.sum(np.unique(y_test, return_counts=True)[1])
logging.info(f"balance of y in test: {test_y_proportion}")
2022-09-19 23:32:55 - INFO | balance of y in test: [0.705 0.295]
results = experiment.last_run.download_artifact(
artifact_name='experiment.yaml',
read_from=MLExperimentResults.from_yaml_file
)
logging.info(f"Best Score: {results.best_score}")
logging.info(f"Best Params: {results.best_params}")
2022-09-19 23:32:55 - INFO | Best Score: 0.7566610156276207
2022-09-19 23:32:55 - INFO | Best Params: {'model': 'RandomForestClassifier()', 'imputer': 'SimpleImputer()', 'scaler': 'None', 'pca': 'None', 'encoder': 'OneHotEncoder()'}
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | C | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | learning_rate | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | num_leaves | imputer | scaler | pca | encoder | model_rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 1 | 0.76 | 0.65 | 0.86 | RandomForestClassifier() | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer() | None | None | OneHotEncoder() | 1.00 |
| 2 | 3 | 0.75 | 0.69 | 0.80 | LogisticRegression() | 0.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer(strategy='median') | MinMaxScaler() | None | OneHotEncoder() | 1.00 |
| 5 | 7 | 0.75 | 0.67 | 0.83 | ExtraTreesClassifier() | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer() | None | None | OneHotEncoder() | 1.00 |
| 19 | 9 | 0.74 | 0.64 | 0.84 | XGBClassifier() | NaN | NaN | 1.00 | 1315.00 | NaN | NaN | NaN | NaN | 0.02 | 17.00 | 0.72 | 0.53 | 0.99 | 0.00 | 2.78 | NaN | SimpleImputer() | None | PCA('mle') | CustomOrdinalEncoder() | 1.00 |
| 21 | 12 | 0.74 | 0.66 | 0.81 | LGBMClassifier() | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.63 | 0.25 | NaN | 5.83 | 44.98 | 347.00 | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() | 1.00 |
results.to_formatted_dataframe(return_style=True,
include_rank=True,
num_rows=500)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | C | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | learning_rate | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | num_leaves | imputer | scaler | pca | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.757 | 0.650 | 0.864 | RandomForestClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 2 | 0.750 | 0.646 | 0.855 | RandomForestClassifier() | <NA> | 0.239 | 41.000 | 1,886.000 | 3.000 | 15.000 | 0.864 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | None | OneHotEncoder() |
| 3 | 0.749 | 0.695 | 0.804 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | MinMaxScaler() | None | OneHotEncoder() |
| 4 | 0.749 | 0.619 | 0.879 | RandomForestClassifier() | <NA> | 0.583 | 35.000 | 1,474.000 | 22.000 | 5.000 | 0.765 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | PCA('mle') | CustomOrdinalEncoder() |
| 5 | 0.749 | 0.695 | 0.803 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | None | OneHotEncoder() |
| 6 | 0.749 | 0.694 | 0.804 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | MinMaxScaler() | PCA('mle') | OneHotEncoder() |
| 7 | 0.748 | 0.665 | 0.831 | ExtraTreesClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 8 | 0.744 | 0.641 | 0.847 | LogisticRegression() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | None | OneHotEncoder() |
| 9 | 0.741 | 0.641 | 0.841 | XGBClassifier() | <NA> | <NA> | 1.000 | 1,315.000 | <NA> | <NA> | <NA> | <NA> | 0.022 | 17.000 | 0.716 | 0.530 | 0.986 | 0.002 | 2.778 | <NA> | SimpleImputer() | None | PCA('mle') | CustomOrdinalEncoder() |
| 10 | 0.740 | 0.640 | 0.839 | RandomForestClassifier() | <NA> | 0.445 | 87.000 | 1,244.000 | 33.000 | 27.000 | 0.795 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | None | OneHotEncoder() |
| 11 | 0.738 | 0.621 | 0.855 | RandomForestClassifier() | <NA> | 0.911 | 74.000 | 1,265.000 | 39.000 | 17.000 | 0.751 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | None | OneHotEncoder() |
| 12 | 0.737 | 0.661 | 0.813 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.630 | 0.248 | <NA> | 5.830 | 44.985 | 347.000 | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 13 | 0.733 | 0.597 | 0.870 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.732 | 0.746 | <NA> | 0.877 | 47.642 | 497.000 | SimpleImputer() | None | PCA('mle') | CustomOrdinalEncoder() |
| 14 | 0.732 | 0.682 | 0.782 | ExtraTreesClassifier() | <NA> | 0.502 | 29.000 | 1,249.000 | 25.000 | 35.000 | 0.855 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 15 | 0.728 | 0.695 | 0.760 | ExtraTreesClassifier() | <NA> | 0.135 | 15.000 | 1,987.000 | 10.000 | 39.000 | 0.708 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 16 | 0.725 | 0.635 | 0.815 | ExtraTreesClassifier() | <NA> | 0.768 | 54.000 | 909.000 | 16.000 | 30.000 | 0.762 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | None | OneHotEncoder() |
| 17 | 0.725 | 0.627 | 0.823 | XGBClassifier() | <NA> | <NA> | 3.000 | 1,482.000 | <NA> | <NA> | <NA> | <NA> | 0.067 | 18.000 | 0.889 | 0.636 | 0.615 | 0.000 | 2.093 | <NA> | SimpleImputer() | None | None | CustomOrdinalEncoder() |
| 18 | 0.723 | 0.623 | 0.823 | ExtraTreesClassifier() | <NA> | 0.537 | 2.000 | 1,275.000 | 14.000 | 47.000 | 0.805 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 19 | 0.723 | 0.628 | 0.817 | XGBClassifier() | <NA> | <NA> | 20.000 | 1,733.000 | <NA> | <NA> | <NA> | <NA> | 0.010 | 2.000 | 0.673 | 0.772 | 0.881 | 0.281 | 2.005 | <NA> | SimpleImputer(strategy='median') | None | None | OneHotEncoder() |
| 20 | 0.713 | 0.604 | 0.823 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 21 | 0.710 | 0.663 | 0.758 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.429 | 0.877 | <NA> | 13.997 | 35.364 | 388.000 | SimpleImputer() | None | None | CustomOrdinalEncoder() |
| 22 | 0.705 | 0.674 | 0.736 | LGBMClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | 0.965 | 0.241 | <NA> | 19.400 | 3.771 | 281.000 | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() |
| 23 | 0.702 | 0.659 | 0.745 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | MinMaxScaler() | None | CustomOrdinalEncoder() |
| 24 | 0.699 | 0.610 | 0.788 | XGBClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 25 | 0.689 | 0.555 | 0.823 | XGBClassifier() | <NA> | <NA> | 4.000 | 1,961.000 | <NA> | <NA> | <NA> | <NA> | 0.271 | 3.000 | 0.671 | 0.797 | 0.696 | 0.000 | 2.150 | <NA> | SimpleImputer(strategy='most_frequent') | None | PCA('mle') | OneHotEncoder() |
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | imputer | pca | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.757 | 0.650 | 0.864 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 2 | 0.750 | 0.646 | 0.855 | 0.239 | 41.000 | 1,886.000 | 3.000 | 15.000 | 0.864 | gini | SimpleImputer(strategy='most_frequent') | None | OneHotEncoder() |
| 3 | 0.749 | 0.619 | 0.879 | 0.583 | 35.000 | 1,474.000 | 22.000 | 5.000 | 0.765 | entropy | SimpleImputer(strategy='most_frequent') | PCA('mle') | CustomOrdinalEncoder() |
| 4 | 0.740 | 0.640 | 0.839 | 0.445 | 87.000 | 1,244.000 | 33.000 | 27.000 | 0.795 | gini | SimpleImputer(strategy='most_frequent') | None | OneHotEncoder() |
| 5 | 0.738 | 0.621 | 0.855 | 0.911 | 74.000 | 1,265.000 | 39.000 | 17.000 | 0.751 | entropy | SimpleImputer(strategy='most_frequent') | None | OneHotEncoder() |
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | C | imputer | scaler | pca | encoder |
|---|---|---|---|---|---|---|---|---|
| 1 | 0.749 | 0.695 | 0.804 | 0.000 | SimpleImputer(strategy='median') | MinMaxScaler() | None | OneHotEncoder() |
| 2 | 0.749 | 0.695 | 0.803 | 0.000 | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | None | OneHotEncoder() |
| 3 | 0.749 | 0.694 | 0.804 | 0.000 | SimpleImputer() | MinMaxScaler() | PCA('mle') | OneHotEncoder() |
| 4 | 0.744 | 0.641 | 0.847 | <NA> | SimpleImputer() | StandardScaler() | None | OneHotEncoder() |
| 5 | 0.702 | 0.659 | 0.745 | 0.000 | SimpleImputer(strategy='median') | MinMaxScaler() | None | CustomOrdinalEncoder() |
results.plot_performance_across_trials(facet_by='model').show()
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
# height=1000, width=1000).show()
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
height=800)
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()
results.plot_score_vs_parameter(
query='model == "RandomForestClassifier()"',
parameter='max_features',
size='max_depth',
color='encoder',
)
# results.plot_parameter_vs_parameter(
# query='model == "XGBClassifier()"',
# parameter_x='colsample_bytree',
# parameter_y='learning_rate',
# size='max_depth'
# )
# results.plot_parameter_vs_parameter(
# query='model == "XGBClassifier()"',
# parameter_x='colsample_bytree',
# parameter_y='learning_rate',
# size='imputer'
# )
last_model = experiment.last_run.download_artifact(
artifact_name='model/model.pkl',
read_from=read_pickle
)
print(type(last_model.model))
<class 'sklearn.pipeline.Pipeline'>
last_model
SklearnModelWrapper(model=Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration',
'credit_amount',
'installment_commitment',
'residence_since',
'age',
'existing_credits',
'num_dependents']),
('n...
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history',
'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing',
'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500,
random_state=42))]))In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. SklearnModelWrapper(model=Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration',
'credit_amount',
'installment_commitment',
'residence_since',
'age',
'existing_credits',
'num_dependents']),
('n...
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history',
'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing',
'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500,
random_state=42))]))Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment',
'residence_since', 'age',
'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps...,
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history', 'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing', 'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500, random_state=42))])ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment', 'residence_since',
'age', 'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps=[('encoder',
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status', 'credit_history',
'purpose', 'savings_status', 'employment',
'personal_status', 'other_parties',
'property_magnitude', 'other_payment_plans',
'housing', 'job', 'own_telephone',
'foreign_worker'])])['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer())
SimpleImputer()
SimpleImputer()
TransformerChooser()
TransformerChooser()
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore'))
OneHotEncoder(handle_unknown='ignore')
OneHotEncoder(handle_unknown='ignore')
RandomForestClassifier(n_estimators=500, random_state=42)
test_predictions = last_model.predict(X_test)
test_predictions[0:10]
array([0.388, 0.506, 0.724, 0.368, 0.056, 0.472, 0.076, 0.47 , 0.18 ,
0.23 ])
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
actual_values=y_test,
predicted_scores=test_predictions,
score_threshold=0.37
)
evaluator.plot_actual_vs_predict_histogram()
evaluator.plot_confusion_matrix()
evaluator.all_metrics_df(return_style=True,
dummy_classifier_strategy=['prior', 'constant'],
round_by=3)
| Score | Dummy (prior) | Dummy (constant) | Explanation | |
|---|---|---|---|---|
| AUC | 0.823 | 0.500 | 0.500 | Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier) |
| True Positive Rate | 0.746 | 0.000 | 1.000 | 74.6% of positive instances were correctly identified.; i.e. 44 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall |
| True Negative Rate | 0.801 | 1.000 | 0.000 | 80.1% of negative instances were correctly identified.; i.e. 113 "Negative Class" labels were correctly identified out of 141 instances |
| False Positive Rate | 0.199 | 0.000 | 1.000 | 19.9% of negative instances were incorrectly identified as positive; i.e. 28 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances |
| False Negative Rate | 0.254 | 1.000 | 0.000 | 25.4% of positive instances were incorrectly identified as negative; i.e. 15 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances |
| Positive Predictive Value | 0.611 | 0.000 | 0.295 | When the model claims an instance is positive, it is correct 61.1% of the time; i.e. out of the 72 times the model predicted "Positive Class", it was correct 44 times; a.k.a precision |
| Negative Predictive Value | 0.883 | 0.705 | 0.000 | When the model claims an instance is negative, it is correct 88.3% of the time; i.e. out of the 128 times the model predicted "Negative Class", it was correct 113 times |
| F1 Score | 0.672 | 0.000 | 0.456 | The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. |
| Precision/Recall AUC | 0.662 | 0.295 | 0.295 | Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats. |
| Accuracy | 0.785 | 0.705 | 0.295 | 78.5% of instances were correctly identified |
| Error Rate | 0.215 | 0.295 | 0.705 | 21.5% of instances were incorrectly identified |
| % Positive | 0.295 | 0.295 | 0.295 | 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class" |
| Total Observations | 200 | 200 | 200 | There are 200 total observations; i.e. sample size |
evaluator.plot_roc_auc_curve().show()
evaluator.plot_precision_recall_auc_curve().show()
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
evaluator.calculate_lift_gain(return_style=True)
| Gain | Lift | |
|---|---|---|
| Percentile | ||
| 5 | 0.14 | 2.71 |
| 10 | 0.24 | 2.37 |
| 15 | 0.37 | 2.49 |
| 20 | 0.49 | 2.46 |
| 25 | 0.54 | 2.17 |
| 30 | 0.66 | 2.20 |
| 35 | 0.71 | 2.03 |
| 40 | 0.75 | 1.86 |
| 45 | 0.80 | 1.77 |
| 50 | 0.83 | 1.66 |
| 55 | 0.85 | 1.54 |
| 60 | 0.86 | 1.44 |
| 65 | 0.90 | 1.38 |
| 70 | 0.93 | 1.33 |
| 75 | 0.95 | 1.27 |
| 80 | 0.97 | 1.21 |
| 85 | 0.98 | 1.16 |
| 90 | 1.00 | 1.11 |
| 95 | 1.00 | 1.05 |
| 100 | 1.00 | 1.00 |
production_model = production_run.download_artifact(
artifact_name='model/model.pkl',
read_from=read_pickle
)
print(type(production_model.model))
<class 'sklearn.pipeline.Pipeline'>
production_model
SklearnModelWrapper(model=Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration',
'credit_amount',
'installment_commitment',
'residence_since',
'age',
'existing_credits',
'num_dependents']),
('n...
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history',
'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing',
'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500,
random_state=42))]))In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. SklearnModelWrapper(model=Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration',
'credit_amount',
'installment_commitment',
'residence_since',
'age',
'existing_credits',
'num_dependents']),
('n...
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history',
'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing',
'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500,
random_state=42))]))Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment',
'residence_since', 'age',
'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps...,
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status',
'credit_history', 'purpose',
'savings_status',
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing', 'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(n_estimators=500, random_state=42))])ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer())),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser())]),
['duration', 'credit_amount',
'installment_commitment', 'residence_since',
'age', 'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps=[('encoder',
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore')))]),
['checking_status', 'credit_history',
'purpose', 'savings_status', 'employment',
'personal_status', 'other_parties',
'property_magnitude', 'other_payment_plans',
'housing', 'job', 'own_telephone',
'foreign_worker'])])['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer())
SimpleImputer()
SimpleImputer()
TransformerChooser()
TransformerChooser()
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=OneHotEncoder(handle_unknown='ignore'))
OneHotEncoder(handle_unknown='ignore')
OneHotEncoder(handle_unknown='ignore')
RandomForestClassifier(n_estimators=500, random_state=42)
test_predictions = production_model.predict(X_test)
test_predictions[0:10]
array([0.388, 0.506, 0.724, 0.368, 0.056, 0.472, 0.076, 0.47 , 0.18 ,
0.23 ])
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
actual_values=y_test,
predicted_scores=test_predictions,
score_threshold=0.37
)
evaluator.plot_actual_vs_predict_histogram()
evaluator.plot_confusion_matrix()
evaluator.all_metrics_df(return_style=True,
dummy_classifier_strategy=['prior', 'constant'],
round_by=3)
| Score | Dummy (prior) | Dummy (constant) | Explanation | |
|---|---|---|---|---|
| AUC | 0.823 | 0.500 | 0.500 | Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier) |
| True Positive Rate | 0.746 | 0.000 | 1.000 | 74.6% of positive instances were correctly identified.; i.e. 44 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall |
| True Negative Rate | 0.801 | 1.000 | 0.000 | 80.1% of negative instances were correctly identified.; i.e. 113 "Negative Class" labels were correctly identified out of 141 instances |
| False Positive Rate | 0.199 | 0.000 | 1.000 | 19.9% of negative instances were incorrectly identified as positive; i.e. 28 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances |
| False Negative Rate | 0.254 | 1.000 | 0.000 | 25.4% of positive instances were incorrectly identified as negative; i.e. 15 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances |
| Positive Predictive Value | 0.611 | 0.000 | 0.295 | When the model claims an instance is positive, it is correct 61.1% of the time; i.e. out of the 72 times the model predicted "Positive Class", it was correct 44 times; a.k.a precision |
| Negative Predictive Value | 0.883 | 0.705 | 0.000 | When the model claims an instance is negative, it is correct 88.3% of the time; i.e. out of the 128 times the model predicted "Negative Class", it was correct 113 times |
| F1 Score | 0.672 | 0.000 | 0.456 | The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. |
| Precision/Recall AUC | 0.662 | 0.295 | 0.295 | Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats. |
| Accuracy | 0.785 | 0.705 | 0.295 | 78.5% of instances were correctly identified |
| Error Rate | 0.215 | 0.295 | 0.705 | 21.5% of instances were incorrectly identified |
| % Positive | 0.295 | 0.295 | 0.295 | 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class" |
| Total Observations | 200 | 200 | 200 | There are 200 total observations; i.e. sample size |
evaluator.plot_roc_auc_curve().show()
evaluator.plot_precision_recall_auc_curve().show()
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
evaluator.calculate_lift_gain(return_style=True)
| Gain | Lift | |
|---|---|---|
| Percentile | ||
| 5 | 0.14 | 2.71 |
| 10 | 0.24 | 2.37 |
| 15 | 0.37 | 2.49 |
| 20 | 0.49 | 2.46 |
| 25 | 0.54 | 2.17 |
| 30 | 0.66 | 2.20 |
| 35 | 0.71 | 2.03 |
| 40 | 0.75 | 1.86 |
| 45 | 0.80 | 1.77 |
| 50 | 0.83 | 1.66 |
| 55 | 0.85 | 1.54 |
| 60 | 0.86 | 1.44 |
| 65 | 0.90 | 1.38 |
| 70 | 0.93 | 1.33 |
| 75 | 0.95 | 1.27 |
| 80 | 0.97 | 1.21 |
| 85 | 0.98 | 1.16 |
| 90 | 1.00 | 1.11 |
| 95 | 1.00 | 1.05 |
| 100 | 1.00 | 1.00 |